* Begin log file
capture log close sublog
log using "$projdir/log/3_qwi_synth.txt", name(sublog) text replace

*-------------------------------------------------------------------------------
* Description: Estimate the uncorrected, bias-corrected, and pandemic-corrected
* synthetic control results for individual counties using QWI data for CA and NY
* Author: Denis Sosinskiy
*
* Updated: August 28, 2023
*-------------------------------------------------------------------------------
clear

*---------------------------------------------------------------------------
* California
*---------------------------------------------------------------------------

* Create or clear necessary folders
foreach y in sepr sepnewr {
	local n = 7225
	local s = "allage" 
			
	* Ensure necessary folders exist
	capture mkdir "$projdir/dta/analysis/qwi"
	capture mkdir "$projdir/dta/analysis/qwi/`y'"
	capture mkdir "$projdir/dta/analysis/qwi/`y'/naics`n'"
	capture mkdir "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'"
	capture mkdir "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/California"
	capture mkdir "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/California/corrected"
					
	* Clear the old .dta files (if any)
	di "Erasing previously saved outcome data"
	di "..."
	di
	qui local filelist: dir "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/California" files "*.dta"
	di "Filelist:" `filelist'
	foreach f of local filelist {
		qui erase "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/California/`f'"
	}
			
	* And the "corrected" folder
	qui local filelist: dir "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/California/corrected" files "*.dta"
	di "Filelist:" `filelist'
	qui gettoken first filelist : filelist
				
	* Clear the old .dta files (if any)
	qui local filelist: dir "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/California/corrected" files "*.dta"
	di "Filelist:" `filelist'
	foreach f of local filelist {
		qui erase "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/California/corrected/`f'"
	}
}

* Create tempfiles
tempfile core
tempfile core0
tempfile core1
tempfile core2
tempfile core3
tempfile core4

* Set start and end quarters
local trp = 218
local start_tm = 199
local end_tm = 249 
local tot_tm = `end_tm' - `start_tm' + 1
local endqtrlab "2022Q2"

* Ensure distinct package is installed
capture ssc install distinct

* Load the data
use "$projdir/dta/build/cln/qwi_allage.dta", clear

* Identify and keep treated and donor counties
* Generate donor variable
qui gen donor = (floor(cty_fips/1000) != 6)

* Sort
sort cty_fips

* Identify donor counties from QCEW pool
merge m:1 cty_fips using "$projdir/dta/build/cln/donor_cty.dta"
rename _merge merge_donor

* Identify treatment counties from QCEW pool
merge m:1 cty_fips using "$projdir/dta/build/cln/treated_cty.dta"

* Keep donor and treatment counties
keep if merge_donor == 3 | (_merge == 3 & donor == 0)

* Identify treated counties
levelsof cty_fips if floor(cty_fips/1000) == 6, local(trcty)
local trctylist = subinstr("`trcty'", " ", ",",.)
qui distinct cty_fips if inlist(cty_fips, `trctylist')
local trcty_count = r(ndistinct)

* Save tempfile
qui save "`core'", replace

* Merge with pop10
use "$projdir/dta/build/cln/analysis_panel_cty.dta", clear
qui gen cty_fips = 1000*statefips + countyfips
qui keep if !mi(cty_fips)
keep if !mi(pop10)
collapse pop10, by(cty_fips)
keep cty_fips pop10
sort cty_fips
qui save "`core0'", replace
qui use "`core'", clear
merge m:1 cty_fips using `core0', nogen keep(1 3)
qui save "`core'", replace

* Save tempfile
qui compress
qui save "`core'", replace

* Make county change adjustments and get mean vars over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (mean) earn donor year samp_allage [aw=empbeg], by(cty_fips tm)
assert inlist(donor, 0, 1)

* Save tempfile
qui save "`core2'", replace

* Reload the data
qui use "`core'", clear

* Make county change adjustments and get mean of vars over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (mean) sepr sepnewr [aw=employment], by(cty_fips tm)

* Save tempfile
qui save "`core3'", replace

* Reload the data
qui use "`core'", clear

* Make county change adjustments and get sum of emp and sep over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (sum) employment pop10 emp10, by(cty_fips tm naics)

* Merge
qui merge 1:1 cty_fips tm using "`core2'", nogen norep
qui merge 1:1 cty_fips tm using "`core3'", nogen norep

* Keep only a balanced panel over periods of interest
qui keep if inrange(tm, `start_tm', `end_tm')	
bysort cty_fips: gen ct = _N
qui keep if ct == `tot_tm'

* Merge with the LAUS data
qui merge m:1 cty_fips year using "$projdir/dta/build/cln/laus_cty_NYCnocombine.dta", nogen norep keep(3)

* Merge with the Covid-impact index
qui save "`core'", replace
qui use "$projdir/dta/build/cln/covid_index.dta", clear
qui rename (countyfips) (cty_fips)
qui gen covid_index = (workplace + retail)/2
qui keep cty_fips covid_index
qui save "`core2'", replace
qui use "`core'", clear
merge m:1 cty_fips using "`core2'", nogen norep keep(3)
qui save "`core'", replace

* Keep variables of interest	
qui keep cty_fips tm naics employment sepr sepnewr emp10 pop10 unr covid_index donor samp_allage

* Save
qui save "`core0'", replace

* Normalise and debias variables
local n = 7225
qui use "`core0'", clear

* Put in % of 2014q2 levels
foreach v in employment sepr sepnewr {
	qui gen x = `v' if tm == 217
	bysort cty_fips: egen xx = max(x)
	qui replace `v' = 100*`v'/xx
	qui drop x xx
}

* Reshape data to debias it
local predvars ""
forval t = 199/207 {
	qui gen x = employment if tm == `t'
	bysort cty_fips: egen employment_`t' = max(x)
	qui drop x
	local predvars "`predvars' employment_`t'"
	qui gen x = sepr if tm == `t'
	bysort cty_fips: egen sepr_`t' = max(x)
	qui drop x
	local predvars "`predvars' sepr_`t'"
	qui gen x = sepnewr if tm == `t'
	bysort cty_fips: egen sepnewr_`t' = max(x)
	qui drop x
	local predvars "`predvars' sepnewr_`t'"
}
foreach v in employment sepr sepnewr {
	bysort cty_fips: egen x = mean(`v') if inrange(tm, 199, 207)
	bysort cty_fips: egen mean_`v' = max(x)
	qui drop x
	local predvars "`predvars' mean_`v'"
}
save "`core1'", replace

* "Covid- and bias-correct" the outcomes of interest
qui gen p_employment = .
qui gen p_sepr = .
qui gen p_sepnewr = .
qui levelsof tm, local(Time)
foreach v in employment sepr sepnewr  {
	foreach t of local Time {
		qui reg `v' `predvars' covid_index [aw=pop10] if tm == `t' & donor == 1
		qui predict p_`v'_`t', resid
		qui replace p_`v' = p_`v'_`t' if tm == `t'
		qui drop p_`v'_`t'
	}
}
		
* Keep key variables
qui keep cty_fips tm naics employment sepr sepnewr p_* emp10 pop10 unr donor samp_allage
		
* Compress, sort, and save
qui compress
qui sort cty_fips tm
qui save "`core'", replace

* Get the synthetic control weights for each treated county
qui use "`core'", clear
qui levelsof cty_fips if donor == 0, local(trcty)

foreach c of local trcty {
	local s = "allage"
	foreach y in sepr sepnewr  {
		local n = 7225 
				
		* Set macros
		local titloc "Seperation Rate"
		if strpos("`y'", "sepnewr") != 0 {
			local titloc "Seperation Rate Of New Hires"
		}
		local transform "transform(`y', normalize)"
		local ytitloc "Normalized Gap (%)"
			
		* Re-load the data
		qui use "`core'", clear
						
		* Restrict to selected sample of counties observed for NAICS `n', and ensure a balanced panel
		qui keep if samp_`s' == 1
		qui keep if naics == `n'
		qui keep if cty_fips == `c' | donor == 1
		bysort cty_fips: gen N = _N
		qui keep if N == `tot_tm'
		drop N
		local tlab "tlabel(2009Q4(4)`endqtrlab', angle(45))"
		qui levelsof cty_fips if inlist(cty_fips, `trctylist'), local(county)
		qui distinct cty_fips if donor == 1
		local donorct = r(ndistinct)
		qui distinct cty_fips if inlist(cty_fips, `trctylist')
		di "Balanced panel exists for `r(ndistinct)' of `trcty_count' treated counties and `donorct' donor pool counties"

		* Save tempfile
		save "`core2'", replace
						
		* Create population-based weights for the averaging
		qui gen x = pop10
		bysort cty_fips: egen popwt = max(x)
					
		* Create the list of pre-treatment outcome period predictors
		local emp10preds
		local normemp
		local ypreds
		local predend = 207
		forval t = `start_tm'/`predend' {
			local ypreds "`ypreds' `y'(`t')"
			local emp10preds "`emp10preds' emp10(`t')"
			if `t' == `predend' {
				local emp10preds "`emp10preds' emp10(`start_tm'(1)`predend')"
			}
			local normemp "emp10"
		}
	
		* tsset
		format tm %tq
		tsset cty_fips tm, quarterly
		qui gen treated = (donor == 0) 
		qui gen trp = 218*treated
					
		#delimit ;
			allsynth
				`y' `ypreds' `y'(`start_tm'(1)`predend') unr(`start_tm'(4)`predend') `emp10preds', 
				trunit(`c') trperiod(`trp') 
				bcorrect(merge) 
				transform(`y' `normemp', normalize)
				pvalues(rmspe)
				keep($projdir/dta/analysis/qwi/`y'/naics`n'/`s'/California/tr_`c') replace;
			#delimit cr
						
		* Load the estimates data
		qui use "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/California/tr_`c'", clear
					
		* Get the weights
		qui keep cty_fips _Co_Number _W_Weight
		qui keep if !mi(_W_) & _W_ > 0
		qui rename cty_fips county_fips
		qui rename _Co_Number cty_fips
		qui levelsof county_fips, local(units)
		qui gettoken first : units
		qui save "`core2'", replace
					
		* For each unit, merge the weights into the core data
		foreach i of local units {
			qui use "`core2'", clear
			qui keep if county_fips == `i'
			qui save "`core3'", replace
			qui use "`core'", clear
			qui keep if naics == `n'
			qui keep cty_fips tm `y' p_`y'
			merge m:1 cty_fips using "`core3'", nogen norep keep(3)
			if `i' != `first' {
				qui append using "`core4'"
			}
			qui save "`core4'", replace
		}

		* Collapse to get the synthetic unit estimates for the treated unit and the donor pool units and save
		qui collapse (mean) p_`y'_synthetic=p_`y' [aw=_W_Weight], by(county_fips tm)
		qui rename county_fips cty_fips
		qui save "`core2'", replace

		* Merge into core data for the treated unit, and save
		qui use "`core'", clear
		qui keep if naics == `n'
		qui keep cty_fips tm p_`y' pop10
		qui rename p_`y' p_`y'_treated
		qui merge 1:1 cty_fips tm using "`core2'", nogen norep
		qui rename tm _time
		qui gen p_gap = p_`y'_treated - p_`y'_synthetic
		qui gen trunit = `c'
		qui gen trperiod = `trp'
		qui save "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/California/corrected/tr_`c'", replace
	}
}

*---------------------------------------------------------------------------
* NY counties
*---------------------------------------------------------------------------
clear

* Create or clear necessary folders
foreach y in sepr sepnewr {
	local n = 7225
	local s = "allage" 
			
	* Ensure necessary folders exist
	capture mkdir "$projdir/dta/analysis/qwi"
	capture mkdir "$projdir/dta/analysis/qwi/`y'"
	capture mkdir "$projdir/dta/analysis/qwi/`y'/naics`n'"
	capture mkdir "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'"
	capture mkdir "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/NY"
	capture mkdir "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/NY/corrected"
					
	* Clear the old .dta files (if any)
	di "Erasing previously saved outcome data"
	di "..."
	di
	qui local filelist: dir "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/NY" files "*.dta"
	di "Filelist:" `filelist'
	foreach f of local filelist {
		qui erase "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/NY/`f'"
	}
				
	* And the "corrected" folder
	qui local filelist: dir "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/NY/corrected" files "*.dta"
	di "Filelist:" `filelist'
	qui gettoken first filelist : filelist
			
	* Clear the old .dta files (if any)
	qui local filelist: dir "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/NY/corrected" files "*.dta"
	di "Filelist:" `filelist'
	foreach f of local filelist {
		qui erase "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/NY/corrected/`f'"
	}
}

* Create tempfiles
tempfile core
tempfile core0
tempfile core1
tempfile core2
tempfile core3
tempfile core4

* Set start and end quarters
local trp = 216
local start_tm = 199
local end_tm = 249
local tot_tm = `end_tm' - `start_tm' + 1
local endqtrlab "2022Q2"

* Ensure distinct package is installed
capture ssc install distinct

* Load the data
use "$projdir/dta/build/cln/qwi_allage.dta", clear

* Identify and keep treated and donor counties
* Generate donor variable
qui gen donor = (floor(cty_fips/1000) != 36)

* Sort
sort cty_fips

* Identify donor counties from QCEW pool
merge m:1 cty_fips using "$projdir/dta/build/cln/donor_cty.dta"
rename _merge merge_donor

* Identify treatment counties from QCEW pool
merge m:1 cty_fips using "$projdir/dta/build/cln/treated_cty.dta"

* Keep donor and treatment counties
keep if merge_donor == 3 | (_merge == 3 & donor == 0)

* Identify treated counties
levelsof cty_fips if floor(cty_fips/1000) == 36, local(trcty)
local trctylist = subinstr("`trcty'", " ", ",",.)
qui distinct cty_fips if inlist(cty_fips, `trctylist')
local trcty_count = r(ndistinct)

* Save tempfile
qui save "`core'", replace

* Merge with pop10
use "$projdir/dta/build/cln/analysis_panel_cty.dta", clear
qui gen cty_fips = 1000*statefips + countyfips
qui keep if !mi(cty_fips)
keep if !mi(pop10)
collapse pop10, by(cty_fips)
keep cty_fips pop10
sort cty_fips
qui save "`core0'", replace
qui use "`core'", clear
merge m:1 cty_fips using `core0', nogen keep(1 3)
qui save "`core'", replace

* Save tempfile
qui compress
qui save "`core'", replace

* Make county change adjustments and get mean of earnings over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (mean) earn donor year samp_allage [aw=empbeg], by(cty_fips tm)
assert inlist(donor, 0, 1)

* Save tempfile
qui save "`core2'", replace

* Reload the data
qui use "`core'", clear

* Make county change adjustments and get mean of earnings over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (mean) sepr sepnewr [aw=employment], by(cty_fips tm)

* Save tempfile
qui save "`core3'", replace

* Reload the data
qui use "`core'", clear

* Make county change adjustments and get sum of emp and sep over remaining counties
qui do "$projdir/do/build/cw/cty_changes_NYCnocombine.do"
qui collapse (sum) employment pop10 emp10, by(cty_fips tm naics)

* Merge
qui merge 1:1 cty_fips tm using "`core2'", nogen norep
qui merge 1:1 cty_fips tm using "`core3'", nogen norep

* Keep only a balanced panel over periods of interest
qui keep if inrange(tm, `start_tm', `end_tm')	
bysort cty_fips: gen ct = _N
qui keep if ct == `tot_tm'

* Merge with the LAUS data
qui merge m:1 cty_fips year using "$projdir/dta/build/cln/laus_cty_NYCnocombine.dta", nogen norep keep(3)

* Merge with the Covid-impact index
qui save "`core'", replace
qui use "$projdir/dta/build/cln/covid_index.dta", clear
qui rename (countyfips) (cty_fips)
qui gen covid_index = (workplace + retail)/2
qui keep cty_fips covid_index
qui save "`core2'", replace
qui use "`core'", clear
merge m:1 cty_fips using "`core2'", nogen norep keep(3)

* Keep variables of interest	
qui keep cty_fips tm naics employment sepr sepnewr emp10 pop10 unr covid_index donor samp_allage

* Save
qui save "`core0'", replace

* Normalise and debias variables
local n = 7225
qui use "`core0'", clear

* Put in % of 2013q4 levels
foreach v in  employment sepr sepnewr {
	qui gen x = `v' if tm == 215
	bysort cty_fips: egen xx = max(x)
	qui replace `v' = 100*`v'/xx
	qui drop x xx
}

* Reshape data to debias it
local predvars ""
forval t = 199/207 {
	qui gen x = employment if tm == `t'
	bysort cty_fips: egen employment_`t' = max(x)
	qui drop x
	local predvars "`predvars' employment_`t'"
	qui gen x = sepr if tm == `t'
	bysort cty_fips: egen sepr_`t' = max(x)
	qui drop x
	local predvars "`predvars' sepr_`t'"
	qui gen x = sepnewr if tm == `t'
	bysort cty_fips: egen sepnewr_`t' = max(x)
	qui drop x
	local predvars "`predvars' sepnewr_`t'"
}
foreach v in employment sepr sepnewr {
	bysort cty_fips: egen x = mean(`v') if inrange(tm, 199, 207)
	bysort cty_fips: egen mean_`v' = max(x)
	qui drop x
	local predvars "`predvars' mean_`v'"
}
save "`core1'", replace

* "Covid- and bias-correct" the outcomes of interest
qui gen p_employment = .
qui gen p_sepr = .
qui gen p_sepnewr = .
qui levelsof tm, local(Time)
foreach v in employment sepr sepnewr  {
	foreach t of local Time {
		qui reg `v' `predvars' covid_index [aw=pop10] if tm == `t' & donor == 1
		qui predict p_`v'_`t', resid
		qui replace p_`v' = p_`v'_`t' if tm == `t'
		qui drop p_`v'_`t'
	}
}
		
* Keep key variables
qui keep cty_fips tm naics employment sepr sepnewr p_* emp10 pop10 unr donor samp_allage
		
* Compress, sort, and save
qui compress
qui sort cty_fips tm
qui save "`core'", replace

* Get the synthetic control weights for each treated county
qui use "`core'", clear
qui levelsof cty_fips if donor == 0, local(trcty)
	
foreach c of local trcty {
	local s = "allage"
	foreach y in sepr sepnewr  {
		local n = 7225 

		* Set macros
		local titloc "Seperation Rate"
		if strpos("`y'", "sepnewr") != 0 {
			local titloc "Seperation Rate Of New Hires"
		}
		local transform "transform(`y', normalize)"
		local ytitloc "Normalized Gap (%)"
			
		* Re-load the data
		qui use "`core'", clear
						
		* Restrict to selected sample of counties observed for NAICS `n', and ensure a balanced panel
		qui keep if  samp_`s' == 1
		qui keep if naics == `n'
		qui keep if cty_fips == `c' | donor == 1
		bysort cty_fips: gen N = _N
		qui keep if N == `tot_tm'
		drop N
		local tlab "tlabel(2009Q4(4)`endqtrlab', angle(45))"
		qui levelsof cty_fips if inlist(cty_fips, `trctylist'), local(county)
		qui distinct cty_fips if donor == 1
		local donorct = r(ndistinct)
		qui distinct cty_fips if inlist(cty_fips, `trctylist')
		di "Balanced panel exists for `r(ndistinct)' of `trcty_count' treated counties and `donorct' donor pool counties"

		* Save tempfile
		save "`core2'", replace
						
		* Create population-based weights for the averaging
		qui gen x = pop10
		bysort cty_fips: egen popwt = max(x)
				
		* Create the list of pre-treatment outcome period predictors
		local emp10preds
		local normemp
		local ypreds
		local predend = 207
		forval t = `start_tm'/`predend' {
			local ypreds "`ypreds' `y'(`t')"
			local emp10preds "`emp10preds' emp10(`t')"
			if `t' == `predend' {
				local emp10preds "`emp10preds' emp10(`start_tm'(1)`predend')"
			}
			local normemp "emp10"
		}
	
		* tsset
		format tm %tq
		tsset cty_fips tm, quarterly
		qui gen treated = (donor == 0) 
		qui gen trp = 216*treated
					
		#delimit ;
			allsynth
				`y' `ypreds' `y'(`start_tm'(1)`predend') unr(`start_tm'(4)`predend') `emp10preds', 
				trunit(`c') trperiod(`trp') 
				bcorrect(merge) 
				transform(`y' `normemp', normalize)
				pvalues(rmspe)
				keep($projdir/dta/analysis/qwi/`y'/naics`n'/`s'/NY/tr_`c') replace;
			#delimit cr
						
		* Load the estimates data
		qui use "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/NY/tr_`c'", clear
					
		* Get the weights
		qui keep cty_fips _Co_Number _W_Weight
		qui keep if !mi(_W_) & _W_ > 0
		qui rename cty_fips county_fips
		qui rename _Co_Number cty_fips
		qui levelsof county_fips, local(units)
		qui gettoken first : units
		qui save "`core2'", replace
					
		* For each unit, merge the weights into the core data
		foreach i of local units {
			qui use "`core2'", clear
			qui keep if county_fips == `i'
			qui save "`core3'", replace
			qui use "`core'", clear
			qui keep if naics == `n'
			qui keep cty_fips tm `y' p_`y'
			merge m:1 cty_fips using "`core3'", nogen norep keep(3)
			if `i' != `first' {
				qui append using "`core4'"
			}
			qui save "`core4'", replace
		}

		* Collapse to get the synthetic unit estimates for the treated unit and the donor pool units and save
		qui collapse (mean) p_`y'_synthetic=p_`y' [aw=_W_Weight], by(county_fips tm)
		qui rename county_fips cty_fips
		qui save "`core2'", replace

		* Merge into core data for the treated unit, and save
		qui use "`core'", clear
		qui keep if naics == `n'
		qui keep cty_fips tm p_`y' pop10
		qui rename p_`y' p_`y'_treated
		qui merge 1:1 cty_fips tm using "`core2'", nogen norep
		qui rename tm _time
		qui gen p_gap = p_`y'_treated - p_`y'_synthetic
		qui gen trunit = `c'
		qui gen trperiod = `trp'
		qui save "$projdir/dta/analysis/qwi/`y'/naics`n'/`s'/NY/corrected/tr_`c'", replace
	}
}

* Close the log
log close sublog
